In this document we explore and fit a model (using Bayesian methods) that describes the weight-dependent intake rate relationship for salmon. The model includes competitive effects and intrinsic fish variation.
This model will later be incorporated into a subsequent model that tries to predict fish growth.
library(tidyverse) # data frame functionality
library(cowplot) # create multi-plots
library(rstan)
library(bayesplot)
rm(list = ls()) # clear memory
# read in data and wrangle
# group feed consumption data
df_group <- read_csv("100S_feed_provided.csv")
df_group$tank <- factor(df_group$tank)
# individual feed consumption data
df_indiv <- read_csv("100S_fish_growth.csv") %>%
select(c(2,3,4,5,7))
# set up individual data frame
df_indiv$ID <- factor(df_indiv$ID)
df_indiv$tank <- factor(df_indiv$tank)
df_indiv$days[which(df_indiv$days == 276)] <- 274
# add appropriate mean weights
df_indiv <- df_indiv %>%
left_join(select(df_group, tank, days, w_bar), by = c("days", "tank"))
# only keep data where intake is known and non-zero
df_indiv <- df_indiv %>%
filter(!is.na(intake), intake > 0)p1 <- ggplot(df_indiv) +
geom_point(aes(x = w, y = intake, color = tank)) +
labs(x = "Weight (g)", y = "Intake (g/ind.)") +
theme_bw()
p2 <- ggplot(df_indiv) +
geom_point(aes(x = w, y = intake, color = tank)) +
labs(x = "Weight (g)", y = "Intake (g/ind.)") +
facet_wrap( ~ tank) +
theme_bw()
plot_grid(p1, p2, ncol = 1)p1 <- ggplot(df_indiv) +
geom_point(aes(x = w, y = intake, color = tank)) +
labs(x = "Weight (g)", y = "Intake (g/ind.)") +
scale_x_log10() + scale_y_log10() +
theme_bw()
p2 <- ggplot(df_indiv) +
geom_point(aes(x = w, y = intake, color = tank), alpha = 0.1) +
labs(x = "Weight (g)", y = "Intake (g/ind.)") +
facet_wrap( ~ tank) +
scale_x_log10() + scale_y_log10() +
theme_bw()
plot_grid(p1, p2, ncol = 1)ggplot(df_indiv) +
geom_point(aes(x = w, y = intake, color = (w - w_bar)/w_bar)) +
labs(x = "Weight (g)", y = "Intake (g/ind.)",
color = "Relative\nweight") +
scale_x_log10() + scale_y_log10() +
scale_color_gradient2(low = "red", mid = "white", high = "blue", midpoint = 0) +
theme_bw() +
theme(
panel.background = element_rect(fill = "grey10"),
panel.grid = element_line(colour = "grey35")
)df_indiv <- df_indiv %>%
mutate(ln_w = log(w))
df_indiv$ln_w_bin <- cut(df_indiv$ln_w, breaks = 8)
df_summary <- df_indiv %>%
group_by(ln_w_bin, tank) %>%
summarise(.groups = "drop",
n = n(),
median_w = median(w),
intake_mean = mean(intake),
intake_var = var(intake),
intake_100 = quantile(intake, probs = 0.1),
intake_900 = quantile(intake, probs = 0.9),
cov = sd(intake)/intake_mean) %>%
filter(n >= 3)ggplot(df_summary) +
geom_ribbon(aes(x = median_w, ymin = intake_100, ymax = intake_900,
fill = tank)) +
geom_point(aes(x = median_w, y = intake_mean)) +
labs(x = "Mean weight (g)", y = "Mean intake (g/ind.)") +
facet_wrap( ~ tank) +
scale_x_log10() + scale_y_log10() +
theme_bw()p1 <- ggplot(df_summary) +
geom_point(aes(x = median_w, y = cov, color = tank)) +
labs(x = "Weight (g)", y = "Coefficient of variation") +
theme_bw()
p2 <- ggplot(df_summary) +
geom_point(aes(x = intake_mean, y = cov, color = tank)) +
labs(x = "Mean intake (g/ind.)", y = "Coefficient of variation") +
theme_bw()
plot_grid(p1,p2)ggplot(df_indiv) +
geom_histogram(aes(x = intake),
binwidth = 2, fill = "lightblue", color = "blue") +
facet_grid(tank ~ ln_w_bin) +
labs(x = "Mean intake (g/ind.)") +
theme_bw()df_growth <- df_indiv %>%
select(ID, tank, days, w, w_bar) %>%
na.omit() %>%
mutate(weight_ratio = w / w_bar) %>%
arrange(tank, ID, days)
df_growth$dt <- NA
df_growth$delta_w_dt <- NA
df_growth$delta_w_1 <- NA
current_id <- df_growth$ID[1]
for (j in 2:nrow(df_growth)) {
if (df_growth$ID[j] == current_id) { # prior data is present
df_growth$dt[j-1] <- df_growth$days[j] - df_growth$days[j-1]
df_growth$delta_w_dt[j-1] <- df_growth$w[j] - df_growth$w[j-1]
df_growth$delta_w_1[j-1] <- df_growth$delta_w_dt[j-1] / df_growth$dt[j-1]
}
current_id <- df_growth$ID[j]
}df_plot <- df_growth %>%
select(tank, ID, days, w, weight_ratio, delta_w_1) %>%
arrange(tank, ID, days) %>%
na.omit()
ggplot(df_plot) +
geom_hline(yintercept = 0, color = "white") +
geom_point(aes(x = w, y = delta_w_1, color = weight_ratio)) +
scale_colour_gradient2(
low = "red",
mid = "grey85",
high = "blue",
midpoint = 1
) +
facet_wrap( ~ tank) +
scale_x_log10() +
labs(x = "Weight (g)", y = "Estimated daily changein weigfht (g)") +
theme_bw() +
theme(
panel.background = element_rect(fill = "grey10"),
panel.grid = element_line(colour = "grey35")
)df_intake_group <- df_group %>%
mutate(intake_ind = feed_consumed_g / fish_no, fish_no_F = factor(fish_no))
df_intake_indiv <- df_indiv %>%
select(ID, tank, days, w, intake) %>%
na.omit() %>%
filter(intake > 0)
ggplot() +
geom_point(data = df_intake_indiv,
aes(x = w, y = intake), color = "blue", alpha = 1) +
geom_point(data = df_intake_group,
aes(x = w_bar, y = intake_ind), color = "red", alpha = 1) +
labs(
x = "Weight (g)", y = "Intake (g/ind.)", subtitle = "Red = group, blue = individual") +
scale_x_log10() +
scale_y_log10() +
facet_wrap( ~ tank) +
theme_bw()Relationships: * a power-law relationship between mass and intake * a
power-law relationship between mass and the coefficient of variation of
intake
Variation: * gamma distribution of variation in observed intake from the
predicted mean * between-individuals * variation in expected intake
among fish is described by a log-normal * between-samples * variation in
expected intake among samples is described by a log-normal
# prepare the data (stan wants a list of data not a data frame)
df_fit <- df_indiv %>%
select(ID, tank, days, w, w_bar, intake) %>%
filter(intake > 0) %>%
na.omit() %>%
mutate(
w_rel = (w - w_bar)/w_bar,
j = as.integer(factor(ID)),
sample = factor(paste(tank, "_", days, sep = "")),
k = as.integer(sample))
I <- nrow(df_fit) # number of observations
J <- max(df_fit$j) # number of fish
K <- max(df_fit$k) # number of fish
w_std <- 600.0 # standard weight (a1 = intake, cov = a2)
stan_dat <- list(
I = I, # number of observations
J = J, # number of fish
K = K, # number of samples
w_std = 600.0, # standardised fish weight (should be a typical weight)
j = df_fit$j, # fish
k = df_fit$k, # sample
w = df_fit$w, # weight
w_rel = df_fit$w_rel, # relative weight
Intake = df_fit$intake) # intake# fit the model!
fit_n <- stan(file = 'IntakeFit.stan', data = stan_dat,
iter = 500, warmup = 250, chains = 4, refresh = 50, seed = 42)## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DBOOST_NO_AUTO_PTR -include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/usr/local/include -fPIC -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
##
## SAMPLING FOR MODEL 'IntakeFit' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.001505 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 15.05 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 500 [ 0%] (Warmup)
## Chain 1: Iteration: 50 / 500 [ 10%] (Warmup)
## Chain 1: Iteration: 100 / 500 [ 20%] (Warmup)
## Chain 1: Iteration: 150 / 500 [ 30%] (Warmup)
## Chain 1: Iteration: 200 / 500 [ 40%] (Warmup)
## Chain 1: Iteration: 250 / 500 [ 50%] (Warmup)
## Chain 1: Iteration: 251 / 500 [ 50%] (Sampling)
## Chain 1: Iteration: 300 / 500 [ 60%] (Sampling)
## Chain 1: Iteration: 350 / 500 [ 70%] (Sampling)
## Chain 1: Iteration: 400 / 500 [ 80%] (Sampling)
## Chain 1: Iteration: 450 / 500 [ 90%] (Sampling)
## Chain 1: Iteration: 500 / 500 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 8.74224 seconds (Warm-up)
## Chain 1: 21.2864 seconds (Sampling)
## Chain 1: 30.0286 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'IntakeFit' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.000771 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 7.71 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 500 [ 0%] (Warmup)
## Chain 2: Iteration: 50 / 500 [ 10%] (Warmup)
## Chain 2: Iteration: 100 / 500 [ 20%] (Warmup)
## Chain 2: Iteration: 150 / 500 [ 30%] (Warmup)
## Chain 2: Iteration: 200 / 500 [ 40%] (Warmup)
## Chain 2: Iteration: 250 / 500 [ 50%] (Warmup)
## Chain 2: Iteration: 251 / 500 [ 50%] (Sampling)
## Chain 2: Iteration: 300 / 500 [ 60%] (Sampling)
## Chain 2: Iteration: 350 / 500 [ 70%] (Sampling)
## Chain 2: Iteration: 400 / 500 [ 80%] (Sampling)
## Chain 2: Iteration: 450 / 500 [ 90%] (Sampling)
## Chain 2: Iteration: 500 / 500 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 6.36567 seconds (Warm-up)
## Chain 2: 5.09602 seconds (Sampling)
## Chain 2: 11.4617 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'IntakeFit' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 0.00077 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 7.7 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 500 [ 0%] (Warmup)
## Chain 3: Iteration: 50 / 500 [ 10%] (Warmup)
## Chain 3: Iteration: 100 / 500 [ 20%] (Warmup)
## Chain 3: Iteration: 150 / 500 [ 30%] (Warmup)
## Chain 3: Iteration: 200 / 500 [ 40%] (Warmup)
## Chain 3: Iteration: 250 / 500 [ 50%] (Warmup)
## Chain 3: Iteration: 251 / 500 [ 50%] (Sampling)
## Chain 3: Iteration: 300 / 500 [ 60%] (Sampling)
## Chain 3: Iteration: 350 / 500 [ 70%] (Sampling)
## Chain 3: Iteration: 400 / 500 [ 80%] (Sampling)
## Chain 3: Iteration: 450 / 500 [ 90%] (Sampling)
## Chain 3: Iteration: 500 / 500 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 7.44511 seconds (Warm-up)
## Chain 3: 4.78214 seconds (Sampling)
## Chain 3: 12.2273 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'IntakeFit' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 0.000765 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 7.65 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 500 [ 0%] (Warmup)
## Chain 4: Iteration: 50 / 500 [ 10%] (Warmup)
## Chain 4: Iteration: 100 / 500 [ 20%] (Warmup)
## Chain 4: Iteration: 150 / 500 [ 30%] (Warmup)
## Chain 4: Iteration: 200 / 500 [ 40%] (Warmup)
## Chain 4: Iteration: 250 / 500 [ 50%] (Warmup)
## Chain 4: Iteration: 251 / 500 [ 50%] (Sampling)
## Chain 4: Iteration: 300 / 500 [ 60%] (Sampling)
## Chain 4: Iteration: 350 / 500 [ 70%] (Sampling)
## Chain 4: Iteration: 400 / 500 [ 80%] (Sampling)
## Chain 4: Iteration: 450 / 500 [ 90%] (Sampling)
## Chain 4: Iteration: 500 / 500 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 5.95893 seconds (Warm-up)
## Chain 4: 5.4562 seconds (Sampling)
## Chain 4: 11.4151 seconds (Total)
## Chain 4:
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 1.31, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
# check out some predicted model parameters and log-likelihood
model_par <- c("a1", "b1", "c1", "a2", "b2", "sigma_j", "sigma_k")
# check for chain convergence
rstan::traceplot(object = fit_n, pars = model_par, inc_warmup = TRUE,
ncol = 3)mcmc_hist(fit_n, pars = model_par) # show histograms of posterior parameter estimatesl_par <- rstan::extract(fit_n, pars = c("a1", "b1", "c1", "a2", "b2"))
df_predict <- tibble(
w = seq(from = min(df_fit$w), to = max(df_fit$w), length.out = 100)
) %>%
mutate(I_025 = 0.0, I_500 = 0.0, I_975 = 0.0, cv_025 = 0.0, cv_500 = 0.0, cv_975 = 0.0)
for (i in 1:nrow(df_predict)) {
df_predict$I_025[i] <- quantile(
l_par$a1*(df_predict$w[i]/w_std)^l_par$b1, probs = 0.025)
df_predict$I_500[i] <- quantile(
l_par$a1*(df_predict$w[i]/w_std)^l_par$b1, probs = 0.500)
df_predict$I_975[i] <- quantile(
l_par$a1*(df_predict$w[i]/w_std)^l_par$b1, probs = 0.975)
df_predict$cv_025[i] <- quantile(
l_par$a2*exp(l_par$b2*(df_predict$w[i]-w_std)), probs = 0.025)
df_predict$cv_500[i] <- quantile(
l_par$a2*exp(l_par$b2*(df_predict$w[i]-w_std)), probs = 0.500)
df_predict$cv_975[i] <- quantile(
l_par$a2*exp(l_par$b2*(df_predict$w[i]-w_std)), probs = 0.975)
}p1 <- ggplot() +
geom_point(data = df_fit,
aes(x = w, y = intake), alpha = 0.2, color = "salmon") +
geom_ribbon(data = df_predict,
aes(x = w, ymin = I_025, ymax = I_975), fill = "salmon") +
geom_line(data = df_predict,
aes(x = w, y = I_500), color = "darkred") +
labs(x = "Weight (g)", y = "Intake (g/ind.)") +
theme_bw()
p2 <- ggplot() +
geom_ribbon(data = df_predict,
aes(x = w, ymin = cv_025, ymax = cv_975), fill = "salmon") +
geom_line(data = df_predict,
aes(x = w, y = cv_500), color = "darkred") +
geom_point(data = df_summary,
aes(x = median_w, y = cov)) +
labs(x = "Weight (g)", y = "Coefficient of variation") +
theme_bw()
plot_grid(p1, p2)ggplot() +
geom_point(data = df_fit,
aes(x = w, y = intake, color = w_rel), alpha = 0.5) +
geom_ribbon(data = df_predict,
aes(x = w, ymin = I_025, ymax = I_975), fill = "salmon") +
geom_line(data = df_predict,
aes(x = w, y = I_500), color = "darkred") +
labs(x = "Weight (g)", y = "Intake (g/ind.)",
color = "Relative\nweight") +
scale_color_gradient2(low = "red", mid = "white", high = "blue",
midpoint = 0) +
theme_bw() +
theme(
panel.background = element_rect(fill = "grey10"),
panel.grid = element_line(colour = "grey35")
)w_rel = 0).
w_re > 0 (are relatively larger than
their tank average), and the model estimates them to have relatively
higher intake (so they should show a positive bias)df_fit$intake_model <- 0.0
for (i in 1:I) {
df_fit$intake_model[i] <- median(
l_par$a1*exp(l_par$c1*df_fit$w_rel[i])*(((df_fit$w[i]/w_std)^l_par$b1))
)
}ggplot(df_fit) +
geom_point(aes(x = intake_model, y = intake, color = w_rel)) +
scale_color_gradient2(low = "red", mid = "white", high = "blue",
midpoint = 0) +
geom_abline(slope = 1, intercept = 0, linetype = "solid",
color = "gold") +
labs(
x = "Predicted intake (g/ind.)",
y = "Observed intake (g/ind.)",
color = "Relative\nweight") +
facet_wrap( ~ factor(days)) +
theme_bw() +
theme(
panel.background = element_rect(fill = "grey10"),
panel.grid = element_line(colour = "grey35")
)df_predict <- tibble(
w_rel = seq(from = min(df_fit$w_rel), to = max(df_fit$w_rel),
length.out = 100),
I_rel_025 = 0.0, I_rel_500 = 0.0, I_rel_975 = 0.0)
for (i in 1:nrow(df_predict)) {
df_predict$I_rel_025[i] <- quantile(
exp(l_par$c1*df_predict$w_rel[i]), probs = 0.025)
df_predict$I_rel_500[i] <- quantile(
exp(l_par$c1*df_predict$w_rel[i]), probs = 0.500)
df_predict$I_rel_975[i] <- quantile(
exp(l_par$c1*df_predict$w_rel[i]), probs = 0.975)
}
p1 <- ggplot(df_predict) +
geom_ribbon(aes(x = w_rel, ymin = I_rel_025, ymax = I_rel_975),
fill = "salmon") +
geom_line(aes(x = w_rel, y = I_rel_500)) +
xlim(-1,1) + # ylim(0,NA) +
labs(x = "Relative weight", y = "Relative intake") +
theme_bw()
p2 <- ggplot(df_fit) +
geom_histogram(aes(x = w_rel), fill = "salmon", color = "darkred") +
labs(x = "Relative weight", y = "Frequency") +
xlim(-1,1) +
theme_bw()
plot_grid(p1, p2, ncol = 1)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
l_par <- rstan::extract(fit_n, pars = c("sigma_j", "RE_j"))
df_j <- tibble(j = 1:J)
df_j$delta <- apply(l_par$RE_j, MARGIN = 2, FUN = median)
df_j <- df_j %>% left_join(select(df_fit, j, w_rel, intake_model), by = "j")
ggplot(df_j) +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_point(aes(x = delta, y = w_rel)) +
labs(x = "Individual-level deviation", y = "Relative weight") +
theme_bw() +
theme(panel.grid = element_blank())